import excel "datasets/Meta_dataset.xlsx" , ///
sheet("Stata") firstrow

*------------------------------------------------------------------------------*
*   General data preparation
*------------------------------------------------------------------------------*
* Converting the string varibles that should be numeric to numeric

foreach var in NPL data_year HDI_subnational HDI_national GDP_pc_national_2010_USD time dist_mean num_sig_var num_struc num_nb num_env ß SE pvalue{
	destring `var', replace
}

* Converting categorical variables from string to long format
foreach var in element site_cat region country subnationalstatedistrict { 
	encode `var', gen (`var'1)
	drop `var'
	rename `var'1 `var'
}

*Labelling the categorical variables
label define element_label 1 "Air" 2 "Unclear" 3 "Water" 4 "Soil"
label values element element_label

label define site_cat_label 1"Hazardous" 2 "Non-hazardous" 3"Nuclear"
label values site_cat site_cat_label

label define region_label 1 "Other" 2 "Asia" 3 "Other" 4 "Europe" 5 "North-America"
label values region region_label

label define active_label 0 "Inactive" 1"Active" 2 "Unclear"
label values active active_label

label define NPL_label 0 "Not on NPL" 1 "On NPL" 2 "Not USA" 3 "Unclear"
label values NPL NPL_label

label define job_label 0 "No employment opportunities" 1 "Employment opportunities" 2 "Unclear"
label values job job_label

label define cleanup_label 0 "Not recognised" 1 "Recognised, no clean-up plan exists" 2 "Clean-up begun, but not finished" 3 "Cleaned-up" 98 "Unclear" 99"Clean-up not needed"
label values cleanup_stage cleanup_label

label define publish_label 0 "Not published" 1 "Published"
label values publish publish_label


* Generating dummies from the categorical variables cleanup_stage, region, NPL, element and site_cat for the summary statistics and manual WLS
gen Africa=1 if region==1
recode Africa (.=0)
gen Asia=1 if region==2
recode Asia (.=0)
gen Australia=1 if region==3
recode Australia (.=0)
gen Europe=1 if region==4
recode Europe (.=0)
gen North_America=1 if region==5
recode North_America (.=0)
gen Other_continent=1 if region==1  
recode Other_continent (.=1) if region==3
recode Other_continent (.=0)

gen NPL_y=1 if NPL==1
recode NPL_y (.=0)
gen NPL_n=1 if NPL==0
recode NPL_n (.=0)
gen not_USA=1 if NPL==2
recode not_USA (.=0)
gen NPL_u=1 if NPL==3
recode NPL_u (.=0)

gen job_n=1 if job==0
recode job_n (.=0)
gen job_y=1 if job==1
recode job_y (.=0)
gen job_u=1 if job==2
recode job_u (.=0)

gen active_n=1 if active==0
recode active_n (.=0)
gen active_y=1 if active==1
recode active_y (.=0)
gen active_u=1 if active==2
recode active_u (.=0)

gen haz=1 if site_cat==1
recode haz (.=0)
gen non_haz=1 if site_cat==2
recode non_haz (.=0)
gen nuclear=1 if site_cat==3
recode nuclear (.=0)

gen air=1 if element==1
recode air(.=0)
gen element_u=1 if element==2
recode element_u (.=0)
gen water=1 if element==3
recode water(.=0)
gen soil=1 if element==4
recode soil(.=0)

gen cleanup0=1 if cleanup_stage==0
recode cleanup0 (.=0)
gen cleanup1=1 if cleanup_stage==1
recode cleanup1 (.=0)
gen cleanup2=1 if cleanup_stage==2
recode cleanup2 (.=0)
gen cleanup3=1 if cleanup_stage==3
recode cleanup3 (.=0)
gen cleanup_unclear=1 if cleanup_stage==98
recode cleanup_unclear (.=0)
gen cleanup_not_needed=1 if cleanup_stage==99
recode cleanup_not_needed (.=0)
* Non_haz and cleanup_not_needed are perfectly collinear, so that cleanup_not_needed is omitted.

gen price_converted=1 if converter_dich==1
recode price_converted(.=0)
gen price_not_converted=1 if converter_dich==0
recode price_not_converted (.=0)
gen convertion_not_needed=1 if converter_dich==2
recode convertion_not_needed (.=0)

gen sample_new = sample /1000
rename sample sample_full
rename sample_new sample

gen HDI=HDI_subnational
replace HDI=HDI_national if HDI==.

*Generating a time index from the four time variables
gen time_control=1 if time_disc==1
replace time_control=1 if time_cont==1
replace time_control=1 if time_dummy==1
replace time_control=1 if price_converted==1
recode time_control(.=0)

*Generating a new moderator reflecting OLS and spatial models
gen OLS_spatial=1 if OLS==1 | spatial==1
recode OLS_spatial (.=0)

*Generating necessary weighting variables for the manual WLS estimation
gen precision=1/elas_SE
gen log_precision=log(precision)
gen elas_var=elas_SE*elas_SE

*Generating an alternative distance-dummy 
sum dist_mean
local mean=r(mean)
gen dist_greater_mean=1 if dist_mean>`mean'  
recode dist_greater_mean (.=0)

*Centering the continuous variables so that they are deviations from the mean
foreach var in year_publish data_year HDI_national HDI_subnational HDI GDP_pc_national_2010_USD dist_mean sample num_sig_var num_expl num_struc num_nb num_env {
	sum `var',meanonly
	gen `var'_c = `var'-r(mean)
}

save "datasets/MRA_waste_sites" , replace


clear

